import scrapy
from workPro.items import WorkproItem


class WorkSpider(scrapy.Spider):
    name = 'work'
    # allowed_domains = ['www.xxx.com']
    start_urls = ['https://www.lagou.com/shanghai-zhaopin/Python/']

    url = 'https://www.lagou.com/shanghai-zhaopin/Python/%d/'
    page_num = 2


    def detail_parse(self,response):
        item = response.meta['item']
        work_text = response.xpath('//*[@id="job_detail"]//text()').extract()
        work_text = ''.join(work_text)
        item['work_text'] = work_text
        yield item

    def parse(self, response):
        li_list = response.xpath('//*[@id="s_position_list"]/ul/li')

        for li in li_list:
            item = WorkproItem()
            work_name = li.xpath('./div[1]/div[1]/div[1]/a/h3/text()').extract()
            work_name = ''.join(work_name)
            item['work_name'] = work_name
            detail_url = li.xpath('./div[1]/div[1]/div[1]/a/@href').extract_first()
            yield scrapy.Request(url=detail_url,callback=self.detail_parse,meta={'item':item})

        if self.page_num <= 10:
            new_url = format(self.url % self.page_num)
            self.page_num += 1
            yield scrapy.Request(url=new_url,callback=self.parse)